$title  demand.GMS

* ###################################

* Per capita income, consumption patterns, and CO2 emissions

* Journal of the Association of Environmental and Resource Economists

* Justin Caron and Thibault Fally

* April 2021

*###################################


* ESTIMATE DEMAND SYSTEM PARAMETERS - NH CES DEMAND SYSTEM - QUADRATIC VERSION

$set SLASH \
$if %system.filesys% == UNIX $set SLASH /


$if not set datadir $set datadir "data%SLASH%"
$setglobal datadir %datadir%


* ---------------------------------------------------------------------------------------------
* OPTIONS :

* SPECIFICATION ?
* choices : tc, theta4
$if not set spec $set spec theta4

* subset of regions to include
$if not set regsubset $set regsubset rall

* MINIMIZE ERRORS ON LOGS OR CONSUMPTION SHARES ?
* choices : log, consshare, logweighted
$ if not set objective $set objective logweighted

* load gravity data from stata or gams ?
* choices : stata, gams
$ if not set gravitydata $set gravitydata gams

* skip reporting ?
* choices : yes, no
$ if not set skipreporting $set skipreporting yes


* BOOSTRAP ?
$if not set boot $set boot no
* set nb of boostrap iterations:
$if not set itr $set itr 4



$if not set ds $set ds gtap8gas
$include loaddata_gtap8.gms


* ------------------------------------
* IMPORT  GRAVITY ESTIMATES

parameter       coeffs stores all sector-specific coefficients
                phiest
                tcostest
                logphiest
                IM
                EX
                cst;


$gdxin estimates\gravityestimates_%ds%.gdx
$load  coeffs phiest im ex cst tcostest

parameter esttheta;
esttheta(i) = 0;

display coeffs;


* --  DEFINE SECTORS WHICH ARE MOSTLY INTERMEDIATES

parameter sharefd % FD in vdm (dom output);
set intermediates(i);

sharefd(i) = (sum((r,g), vdfm(i,g,r) + vifm(i,g,r)) - sum((j,r), vdfm(i,j,r) + vifm(i,j,r)))/ sum((r,g), vdfm(i,g,r) + vifm(i,g,r));

* defined as "intermediate" if  less than 10 % of production goes to final demand
intermediates(i)$(sharefd(i)<0.1) = yes;
display sharefd, intermediates;

* -- DEFINE SERVICE AND TRADABLE SECTORS SECTORS

*set     serv(i) service sectors / CMN, DWE,ISR,OBS, OFI,OSG,ROS,WTR,TRD,CNS, OTP, ATP, WTP, GDT, ELY/
set     serv(i) service sectors /osg/
        tradables(i) the tradable sectors;

tradables(i) = yes;
tradables(serv) = no;
display tradables;


* -----------------------------------------------------------
* NLLS Demand estimations
* -----------------------------------------------------------
* the only exogenous parameters needed are: x(i,r), per capita expenditure, w(n) = wage = PCI


parameter       x(i,r)          per capita expenditure
                w(r)            wage = PCI
                indexp(i)       industry total expenditure,
                logphi,
                phi
                bhat;

set i_(i), r_(r),g_(g),
        rall(r) set of 94 regions;
i_(i) = no;



* -- SELECT WHICH PHI TO USE HERE
*$if "%gravitydata%" == "stata" logphi(i,r) = importdata(i,r,"log_Phi_v2");
*$if "%gravitydata%" == "gams"  phi(i,r) = phiest(i,r);


$if "%gravitydata%" == "gams"  phi(i,r) = phiest(i,r);



* -- SELECT WHICH SECTORS TO USE HERE :

* using all sectors for which we have estimates of PHI :
i_(i)$sum(r,phi(i,r))= yes;

* selecting regions :
rall(r)$sum(i,phi(i,r)) = yes;


* use this parameter in excel to get this set of regions :
set rskm(r) regions with skill ratio ensowments within the 2 middle quartiles
   /MYS, VNM, GEO,KAZ, EGY, ALB, PAK, NGA, SEN, THA, PHL, TUR, ZWE, LTU, COL, LVA, SVK, KOR, TUN, ARG, BOL, HRV, HUN, VEN, MLT, RUS, CZE, SVN, CHL, UKR, MUS, CRI, PAN, IRN, EST, POL,
  NZL, CYP, URY, BRA, ZAF, SGP, CAN, BWA, BGR/ ;
* note, added bgr to estimate median income country elasticities

* other sets :

set red(r) / ALB, ARM, AZE, BGD, BGR, BLR, BOL, CHN, COL, ECU, EGY, GEO, GTM, IDN, IND, KAZ, KGZ, LKA, MAR, MYS, NGA, NIC, PAK, PER, PHL, PRY, ROU, SEN, THA, TUR, UKR, VNM, ZMB, ZWE /;

set blue(r) / ARG, BRA, BWA, CHL, CRI, CYP, CZE, EST, HRV, HUN, IRN, KOR, LTU, LVA, MEX, MLT, MUS, NZL, PAN, POL, RUS, SVK, SVN, TUN, URY, VEN, ZAF /;

set green(r) / AUS, AUT, BEL, CAN, CHE, DEU, ESP, FIN, FRA, GBR, GRC, HKG, IRL, ITA, JPN, NLD, PRT, SGP, SWE, TWN /;


set nored(r) / ARG, AUS, AUT, BEL, BRA, BWA, CAN, CHE, CHL, CRI, CYP, CZE, DEU, DNK, ESP, EST, ETH, FIN, FRA, GBR,
GRC, HKG, HRV, HUN, IRL, IRN, ITA, JPN, KHM, KOR, LAO, LTU, LUX, LVA, MDG, MEX, MLT,  MOZ, MUS, MWI, NLD, NOR,
NZL, PAN, POL, PRT, RUS, SGP, SVK, SVN, SWE, TUN, TWN, TZA, UGA, URY, USA, VEN, ZAF /;


set noblue(r) / ALB, ARM, AUS, AUT, AZE, BEL, BGD, BGR, BLR, BOL, CAN, CHE, CHN, COL, DEU, DNK, ECU, EGY, ESP, ETH,
FIN, FRA, GBR, GEO, GRC, GTM, HKG, IDN, IND, IRL, ITA, JPN, KAZ, KGZ, KHM, LAO, LKA, LUX, MAR, MDG,  MOZ, MWI,
MYS, NGA, NIC, NLD, NOR, PAK, PER, PHL, PRT, PRY, ROU, SEN, SGP, SWE, THA, TUR, TWN, TZA, UGA, UKR, USA, VNM, ZMB, ZWE /;


set nogreen(r) / ALB, ARG, ARM, AZE, BGD, BGR, BLR, BOL, BRA, BWA, CHL, CHN, COL, CRI, CYP, CZE, DNK, ECU, EGY, EST, ETH,
GEO, GTM, HRV, HUN, IDN, IND, IRN, KAZ, KGZ, KHM, KOR, LAO, LKA, LTU, LUX, LVA, MAR, MDG, MEX, MLT,  MOZ, MUS, MWI,
MYS, NGA, NIC, NOR, NZL, PAK, PAN, PER, PHL, POL, PRY, ROU, RUS, SEN, SVK, SVN, THA, TUN, TUR, TZA, UGA, UKR, URY, USA,
VEN, VNM, ZAF, ZMB, ZWE /;



* trick to have r_ defined as "base" set - need that for bootstrapping, cant use ORD on an unstable set
execute_unload 'setr.gdx', rall,  red, blue, green, rskm;

$gdxin setr

* IF USING RED BLUE GREEN SETS, USE THIS PART OF THE CODE :
$if "%regsubset%"=="red" $load r_=red
$if "%regsubset%"=="blue" $load r_=blue
$if "%regsubset%"=="green" $load r_=green
$if "%regsubset%"=="rall" $load r_=rall
$if "%regsubset%"=="riq" $load r_=riq
$if "%regsubset%"=="rskm" $load r_=rskm
display r_;

* dropping intermediates YES NEED TO DO THIS
i_(intermediates) = no;
i_("coa") = yes;
i_("gas") = yes;
i_("p_c") = yes;
i_("coa") = yes;
i_("gas") = yes;
i_("ely") = yes;


* DROPPING DWELLINGS :
i_("dwe") = no;


display i,i_, intermediates;
alias(i_,j_);
alias(r_,s_);

* define new g set
g_(i_) = yes; g_("c") = yes; g_("g") = yes; g_("i") = yes;


parameter nbr, nbi;
nbr = card(r_);
nbi = card(i_);

display nbr, nbi;

display r_, i_;


* definine per-capita expenditure only on selected sectors:
w(r)$pop(r) = 10e8* sum(i_,fd(i_,r)) /pop(r);
x(i,r)$pop(r)  = 10e8 * fd(i,r) / pop(r);
indexp(i)  = sum(r_,fd(i,r_)) / sum((r_,i.local),fd(i,r_));



* ALT WEIGHING SCHEMES, CHOSE HERE:

*	 maximum exp share
indexp(i)  = smax(r_, x(i,r_)/w(r_));

logphi(i,r)$phi(i,r) =log(phi(i,r));


* ignoring observations with very low shares of coal and gas

parameter expshare, sectdrop;
expshare(i_,r_) = fd(i_,r_) / sum(i_.local,fd(i_,r_));

sectdrop(i_,r_) = 1;

*  DROP in reporting at least.. (matters for R2)
sectdrop(i_,r_) = 1;
sectdrop(i_,r_)$(NOT fd(i_,r_)) =0 ;


* compute some statistics
parameter       fittedPCexp     fitted per capita expenditure
                fittedexp       fitted expenditure
*                fittedexpshare  fitted expenditure share
                sstot total sum of squares
                rsquared
                nobs number of observations
                Fstat F-test statistic
                nbp number of parameters in model
                df degrees of freedom for Fstat
                sigma2hat estimated variance of regression
                modelselection;


* ----------------------------------------------
* for bootstrapping :

set boot /1*%itr%/;
option seed=081567;

parameter  wt(r)        weight in the objective
           bootcoef(boot,*,*)
           dim          number of ctries
           rdraw        random draw from the pool of ctries
           cardzz(r)    index on each observation
           wtchk        weight check
;
wt(r_)=1;

alias (r_,k,kk,z,zz);
cardzz(k) = ord(k);
dim=card(k);
display dim, cardzz;


* which country-sector pairs are missing
set missingx(i,r);

missingx(i_,r_) = yes;
missingx(i_,r_)$x(i_,r_) = no;

display missingx;


parameter fe_norm(i);

fe_norm(i) = 10*indexp(i);

fe_norm("pdr") = indexp("pdr") / 100;



set lowincome(r);
lowincome(r_)$(log(w(r_)) < 8.65) = 1;
set	se(i)	secondary energy        /coa,ely,gas, p_c/  ;

* ---------------------------------------------------------
* -- DEFINE MODEL :

variables sse_;

Positive variables       U_(r), theta_, delta_

 eta_(i) coefficient on ICP price index
b_  ;


VARIABLES rho_, nu_, x_, mu_(i), sigma_, epsilondiff_(i), fe_(i) the LOGGED fixed effect, commonrho_  ;

equations  obj_log, obj_consshare, obj_logweighted, budget, commontheta, icppriceindex, epsilon_fact, commondelta, est_theta, sigmadiff, positive_g_cstr, u_cstr, 
commonrho;


* -----------------------
* CLM ESTIMATION


* this is the CLM objective function
obj_logweighted.. sse_ =e=      sum((i_,r_)$x(i_,r_),
 sectdrop(i_,r_)* indexp(i_) *
 wt(r_) * sqr(log(x(i_,r_)) - (
 sigma_ * log(w(r_)) +
 mu_(i_)*logphi(i_,r_)  
 +
* Note: here we redefined g to include ** (1/1- sigma)  -- BUT, this means that the constraint on g (below), must include sigma.
* also, means we should have no non-negativity constraints on rho and nu
 (  fe_(i_)*fe_norm(i_) + rho_(i_)   * log(U_(r_)) - nu_(i_)  * log(U_(r_))*log(U_(r_)) )
)));

* constraint on mu
commontheta(i_) ..      mu_(i_)  =e= ( sigma_ -1) / theta_;

* not used:
commonrho(i_) ..      rho_(i_)  =e= rho_("obs") ;

positive_g_cstr(i_,R_) ..  ( rho_(i_) - 2* log(U_(R_)) * nu_(i_)) / (1-sigma_)  =g= 0 ;

* flex CLM budget:



* in logs 
budget(r_) ..   sum(i_, exp( 
 sigma_ * log(w(r_)) +
 mu_(i_)*logphi(i_,r_)  
 +
* redefine g to include sigma
* (1-sigma_) *
 (  fe_(i_)*fe_norm(i_) + rho_(i_)  * log(U_(r_)) - nu_(i_)  * log(U_(r_))*log(U_(r_)) )
)) =e= w(r_);

* initialize variables:
sse_.L = 0;
epsilondiff_.L(i_) = 0.25;
epsilondiff_.LO(i_) = 1E-4;
epsilondiff_.UP(i_) = 1e3;

* fix one epsilon_ to one (textiles: one that is not a service industry) :
* solution is indep of this choice (if none of the bounds are binding)
*epsilon_.FX("tex") =0.6;

* CLM: can on only normalize one of U and sigma

U_.LO(r_)= 1E-3;

U_.UP(r_)= 20;

U_.FX("USA")= 10;

U_.L(r_)= 10* w(R_)/w("usa");


* for general form
nu_.L(i_) = 0;
nu_.LO(i_) =  -1e4;
nu_.UP(i_) = 1e4;


* TURN OFF QUAD TERM:
*nu_.FX(i_) = 0;

rho_.L(i_) =1;
rho_.UP(i_) =1E4;
rho_.LO(i_) =-1E4;

* this still seems to matter:
b_.L =1/5;
b_.UP =10e7;

fe_.L(i_) = 0.5;
* !! results could sensitive to this number if binding
fe_.LO(i_) = -1E5;
fe_.UP(i_) = 1E3;

mu_.L(i_) =  0.1;
mu_.UP(i_) =  1e2;
mu_.LO(i_) =  -1e3;


sigma_.UP = 1e3;
* IMPOSE SIGMA > 1 and check whether the fit is better.
* HAVE TO CHANGE BOTH of the following:
sigma_.L = 3;
sigma_.LO = 1.1;

x_.LO(i_,R_) = 1e-6;
x_.UP(i_,R_) = 1e4;

eta_.L(i_) = 0;
theta_.L = 1;
eta_.UP(i_) =  1e10;

theta_.UP = 30;

* should be secondary energy only
set ene(i) / coa, gas, ely, p_c /;

parameter forstata;

forstata("fd",i_,r_) = x(i_,r_);
forstata("fd","all ene",r_) = sum(ene, x(ene,r_));
forstata("logpcfd",i_,r_)$x(i_,r_) = log(x(i_,r_));
forstata("logpcfd","all ene",r_) = log(forstata("fd","all ene",r_));
forstata("logPHI",i_,r_) = logphi(i_,r_);
forstata("logpci",i_,r_) = log(w(r_));
forstata("logpci","all ene",r_) = log(w(r_));
* this is total expenditure for the country (for potential weighting)
forstata("total exp",i_,r_) = expenditure(r_);
forstata("total exp","all ene",r_) = expenditure(r_);

parameter  lambda, mu,  sse, fe, theta, epsilon_scale, eta, avgmu;
theta=0;
epsilon_scale=0;
parameter specificationstats for reporting;

* -- define specification :
$if "%spec%"=="tc" eta_.FX(i_) = 0; model nlls /obj_%objective%, budget, positive_g_cstr /; 
$if "%spec%"=="tc" nbp("non-homoth")= card(r_) + 4*card(i_) + 1;


$if "%spec%"=="theta4" theta_.FX = 4; eta_.FX(i_) = 0; model nlls /obj_%objective%, commontheta , budget, positive_g_cstr /; nbp("non-homoth")= card(r_) + 2*card(i_) ;
$if "%spec%"=="theta4" nbp("non-homoth")= card(r_) + 3*card(i_) + 1 ;


* -----------------------

nlls.reslim = 100000;

* -- Solve model :

$ontext
* change solver tolerance ?
nlls.optfile = 1 ;
FILE  OPT   conopt option file  / conopt.OPT /;
PUT OPT;
PUT 'rtredg  3.0e-13' /
 'rtobjr 3.0e-14'/
'rtpiva 1.0e-14'/
'rtpivt 1.0e-12'
PUTCLOSE OPT;
$offtext


* make a savepoint for boostraping
nlls.savepoint=1;
solve nlls using nlp minimizing sse_;
nlls.savepoint=0;

$if "%spec%"=="theta4_noenergy" display sse_.l; 
$if "%spec%"=="theta4_noenergy" $exit

* calculate statistics
coeffs("Theta","coeff", i_) = theta_.L;
coeffs("Epsilon","coeff", i_) = sigma_.L + epsilondiff_.L(i_);
coeffs("Epsilondiff","coeff", i_) = epsilondiff_.L(i_);
coeffs("Sigma","coeff", i_) = sigma_.L;
coeffs("fixed effect","coeff", i_) = fe_.L(i_);
coeffs("rho","coeff", i_) = rho_.L(i_);
coeffs("nu","coeff", i_) = nu_.L(i_);
coeffs("mu","coeff", i_) = mu_.L(i_);


parameter U;
U(r_) = U_.L(r_);

forstata("fitted nh",i_,r_) = 
exp( 
 sigma_.L * log(w(r_)) +
 mu_.L(i_)*logphi(i_,r_)  
 +
 (  fe_.L(i_)*fe_norm(i_) + rho_.L(i_) * log(U_.L(r_)) - nu_.L(i_)  * log(U_.L(r_))*log(U_.L(r_)) )
);

parameter incelast, avgincelast;

* note: estimated rho and nu include the (1-sigma) term
forstata("incel",i_,r_)$x(i_,r_) = sigma_.L + 
  (1 - sigma_.L) * 
 (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_))) * sum(i_.local, x(i_,r_)) /
   sum(i_.local,  (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_))) * x(i_,r_));

* income elasticity with fitted shares:
forstata("incel fitted dem",i_,r_)$x(i_,r_) = sigma_.L + 
  (1 - sigma_.L) * 
 (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_))) * sum(i_.local,   forstata("fitted nh",i_,r_)      ) /
   sum(i_.local,  (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_))) * forstata("fitted nh",i_,r_) );



incelast("DIRECT",i_,r_) = forstata("incel",i_,r_);
incelast("DIRECT fitted dem",i_,r_) = forstata("incel fitted dem",i_,r_);


* weighted by final deman
avgincelast("direct","energy goods","all") = sum((r_,se), fd(se,r_) * IncElast("direct",se,r_)) /
	sum((r_,se), fd(se,r_) ) ;

avgincelast("direct","energy goods","lowincome") = sum((lowincome,se), fd(se,lowincome) * IncElast("direct",se,lowincome)) /
	sum((lowincome,se), fd(se,lowincome) ) ;


* elast of g to U
forstata("elast of g to U",i_,r_)$x(i_,r_) =  (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_)) ) /  (1-sigma_.L);

* drop gas and coal if not in the original data
forstata("fitted nh",i_,r_)$(not sectdrop(i_,r_)) = 0; 

parameter fittedexp; 
fittedexp("non-homoth",i_,r_) = forstata("fitted nh",i_,r_) / 10e8* pop(r_)  ;


* FOR REPORTING, CHOSE WHETHER TO USE WEIGHTED OR UNWEIGHTED R-SQUARED
parameter indexp_report;
*indexp_report(i_) = 1;
* if using  weighted:
indexp_report(i_) = indexp(i_);

sse_.L   = sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - log(forstata("fitted nh",i_,r_) )));

nobs = card(i_) * card(r_);

* calculate the average elasticity of expenditures w.r.t phi :
* weighted by sectors size
avgmu("weighted") = sum(i_, indexp_report(i_) * mu_.L(i_)) / sum(i_, indexp_report(i_));
avgmu("weighted - energy goods") = sum(se, indexp_report(se) * mu_.L(se)) / sum(se, indexp_report(se));
avgmu("unweighted") = sum(i_,   mu_.L(i_)) / card(i_);

* calculate weighted Rsquare for logweighted specification
parameter weightedsstot, weightedrsquared;

weightedrsquared = 0;
weightedsstot = 0;

sse("non-homoth") = sse_.L;
sstot = sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - sum((i_.local,r_.local)$x(i_,r_), log(x(i_,r_)))/nobs  ));
rsquared("nh","all","total") = 1 - sse_.L / sstot;
rsquared("nh","all",i_) = 1 -   sum((r_)$(x(i_,r_) and sectdrop(i_,r_)), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,r_)) -  log(x(i_,r_))  ))
 / 
  sum((r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(x(i_,r_)) - sum((r_.local)$x(i_,r_), log(x(i_,r_)))/card(r_)  ))
;


* by sector
rsquared("nh","lowincome",i_) = 1 -   sum((lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) *  sqr(log(x(i_,lowincome)) - sum((lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;

* by sector
rsquared("nh","lowincome","total") = 1 -   sum((i_,lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((i_,lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) *  sqr(log(x(i_,lowincome)) - sum((i_.local,lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;


* energy goods only:
rsquared("nh","all","energy goods") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) *  sqr(log(forstata("fitted nh",se,r_)) -  log(x(se,r_))  ))
 / 
  sum((se,r_)$x(se,r_), sectdrop(SE,r_) *  indexp_report(SE) *  sqr(log(x(se,r_)) - sum((se.local,r_.local)$x(se,r_), log(x(se,r_)))/ (card(se) * card(r_))  ))
;

modelselection("aic weighted","non-homoth") =  log(sse_.L /nobs) + 2* nbp("non-homoth") / nobs ;
* in paper use this measure
modelselection("bic weighted","non-homoth") =  log(sse_.L /nobs) + log(nobs)* nbp("non-homoth") /nobs ;

* only for export -- will not be used
parameter lambda, sigma_scale;

lambda("non-homoth", r_) = U_.L(r_) ;
sigma_scale("non-homoth") = 0 ;
coeffs("backed out theta","coeff",i_) = 1;



* now estimating Homothetic version = CES..

* here, can either fix epsilon-sigma, or fix U
* decide to keep U fixed, and just impose all epsilon to be the same (rho in the model) (to keep U's comparable in level)

* note: rho is epsilon -sigma

* -- HOMOTHETIC VERSION

* re-ininitialize variables

U_.L(r_) = 1;

eta_.L(i_) = 0;

rho_.L(i_) = -1;

*rho_.FX(i_) =1;

U_.LO(r_) = 0.0001;

U_.UP(r_) = 1000;

* TO BE CONSISTENT WITH OTHER FILE, SET U USA TO 10
U_.FX("USA") = 10;



rho_.UP(i_) = 1000;
*rho_.LO(i_) = 0.001;
rho_.LO(i_) = -1000;


sigma_.Lo = 1.5;

sigma_.L = 1.6;

nu_.FX(i_) = 0;


*re-solve with sigmas fixed at one
*theta_.UP = 1E10;
* with no constraint on mu :
$if "%spec%"=="tc" theta_.FX = 4; eta_.FX(i_) = 0; model nllshomoth /obj_%objective%,  budget, commonrho /; 
$if "%spec%"=="tc" nbp("homoth")= card(r_) + 2*card(i_) + 1 ;

$if "%spec%"=="theta4" theta_.FX = 4; eta_.FX(i_) = 0; model nllshomoth /obj_%objective%, commontheta , budget, commonrho, positive_g_cstr /; 
$if "%spec%"=="theta4" nbp("homoth")= card(r_) + card(i_) + 1;

* solve nllshomoth using nlp minimizing sse_;
* re-set to non-homoth for bootstrap


forstata("fitted h",i_,r_) = 
exp( 
 sigma_.L * log(w(r_)) +
 mu_.L(i_)*logphi(i_,r_)  
 +
 (  fe_.L(i_)*fe_norm(i_) + rho_.L(i_) * log(U_.L(r_)) - nu_.L(i_)  * log(U_.L(r_))*log(U_.L(r_)) )
);



* drop gas and coal if not in the original data
forstata("fitted h",i_,r_)$(not sectdrop(i_,r_)) = 0; 
fittedexp("homoth",i_,r_) = forstata("fitted h",i_,r_) / 10e8* pop(r_)  ;

*** for unweighted R2
sse_.L =  sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - log(forstata("fitted h",i_,r_) )));


modelselection("aic weighted","homoth") = log(sse_.L /nobs) + 2* nbp("homoth") / nobs;

modelselection("bic weighted","homoth") =  log(sse_.L /nobs) + log(nobs)* nbp("homoth") /nobs ;

* Rsquared
rsquared("h","all","total") = 1 - sse_.L / sstot;

* within sector
rsquared("h","all",i_) = 1 -   sum((r_)$(x(i_,r_) and sectdrop(i_,r_)), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,r_)) -  log(x(i_,r_))  ))
 / 
  sum((r_)$x(i_,r_),sectdrop(i_,r_) *  indexp_report(i_) *  sqr(log(x(i_,r_)) - sum((r_.local)$x(i_,r_), log(x(i_,r_)))/card(r_)  ))
;

* by sector
rsquared("H","lowincome",i_) = 1 -   sum((lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(x(i_,lowincome)) - sum((lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;

* by sector
rsquared("h","lowincome","total") = 1 -   sum((i_,lowincome)$(x(i_,lowincome) and sectdrop(i_,lowincome)), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,lowincome)) -  log(x(i_,lowincome))  ))
 / 
  sum((i_,lowincome)$x(i_,lowincome), sectdrop(i_,lowincome) *  indexp_report(i_) * sqr(log(x(i_,lowincome)) - sum((i_.local,lowincome.local)$x(i_,lowincome), log(x(i_,lowincome)))/ (card(i_) * card(lowincome))  ))
;


* energy goods only:
rsquared("h","all","energy goods") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted h",se,r_)) -  log(x(se,r_))  ))
 / 
  sum((se,r_)$x(se,r_), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(x(se,r_)) - sum((se.local,r_.local)$x(se,r_), log(x(se,r_)))/ (card(se) * card(r_))  ))
;


* compute partial Rsquared for NH version
sse("homoth") = sse_.L;

Parameter withinrsquare;

withinrsquare("all sectors","to homoth") = 1 - (sse("non-homoth") / sse("homoth"));
withinrsquare("energy goods","to homoth") = 1 -   sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted nh",se,r_)) -  log(x(se,r_))  ))
 /  sum((se,r_)$(x(se,r_) and sectdrop(se,r_)), sectdrop(SE,r_) *  indexp_report(SE) * sqr(log(forstata("fitted h",se,r_)) -  log(x(se,r_))  ))
;


withinrsquare(i_,"to homoth") = 1 -   sum((r_)$(x(i_,r_) and sectdrop(i_,r_)), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(forstata("fitted nh",i_,r_)) -  log(x(i_,r_))  ))
 /  sum((r_)$(x(i_,r_) and sectdrop(i_,r_)), sectdrop(i_,r_) *  indexp_report(i_) * sqr(log(forstata("fitted h",i_,r_)) -  log(x(i_,r_))  ))
;






* F-TESTS

*  this is restriction of all NH terms:
*fstat("sigma=1") = ((sse("homoth") - sse("non-homoth")) / (nbp("non-homoth") - nbp("homoth"))) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;

*  Significance of the rho's
fstat("rho=1") = ((sse("homoth") - sse("non-homoth")) / (nbp("non-homoth") - nbp("homoth"))) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;

* 
* for energy goods only: compute model with sigma = 1 for energy, get SSE and paste it here:
* run tc_noenergy specification
fstat("sigma=1 energy true") = ((708   - sse("non-homoth")) / (4)) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;
* so we need to compare to F(4,5002) .. at 5% significance level, that is about 1.9
* according to http://www.socscistatistics.com/pvalues/fdistribution.aspx  highly signif <0.00001

* alt computation because "true" doesn't work in the quadratic version
set nonene(i);
nonene(i_) = yes;
nonene(ene) = no;


parameter sse_noenergy;
sse_noenergy   = sum((ene,r_)$x(ene,r_), sectdrop(ene,r_) *  indexp_report(ene) *  sqr(log(x(ene,r_)) - log(forstata("fitted h",ene,r_) ))) +
sum((nonene,r_)$x(nonene,r_), sectdrop(nonene,r_) *  indexp_report(nonene) *  sqr(log(x(nonene,r_)) - log(forstata("fitted nh",nonene,r_) )));

* approx
fstat("sigma=1 energy") = ((sse_noenergy   - sse("non-homoth")) / (8)) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;

* fstat of the quadratic term
* this is complicated: in CLM file, compute SSEenergyonly and copy it here
parameter donald;
donald= 42.5 +
sum((nonene,r_)$x(nonene,r_), sectdrop(nonene,r_) *  indexp_report(nonene) *  sqr(log(x(nonene,r_)) - log(forstata("fitted nh",nonene,r_) )));

fstat("mu=1 energy") = ((donald - sse("non-homoth")) / (4  )) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;



* FOR HOMONOTC USING THE CRIE ESTIMATE! SHOULD BE THE SAME..
withinrsquare("to homoth notc","all") = 1 - (sse("non-homoth") /
964
);

* Signficicance of the NU's
* REQUIRES MANUAL IMPUTING OF SSE FROM THE DEMAND_CLM_T4.GMS FILE
**********
SSE("MU=0") =   702.749944433499;
nbp("MU=0") = nbp("non-homoth") - card(i_);

fstat("mu=1") = ((sse("MU=0") - sse("non-homoth")) / (nbp("non-homoth") - nbp("MU=0"))) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;
* will have an F distribution, with (p2-p1, n-p2) = 1.23

*fstat("commontheta")$(nbp("TC imp") - nbp("non-homoth")) = ((sse("non-homoth") - sse("TC imp")) / (nbp("TC imp") - nbp("non-homoth"))) / (sse("TC imp") / ( nobs - nbp("TC imp"))) ;

specificationstats("sigma") = sigma_.L;
specificationstats("avg incel energy goods") = avgincelast("direct","energy goods","all"); 
specificationstats("avg incel energy goods - low income ctries") = avgincelast("direct","energy goods","lowincome") ;
specificationstats("avgmu - weighted") = avgmu("weighted");
specificationstats("avgmu - weighted - energy goods") = avgmu("weighted - energy goods");
specificationstats("F-stat rho=1") = fstat("rho=1") ;
specificationstats("F-stat mu=1") = fstat("mu=1") ;
specificationstats("R2") = rsquared("nh","all","total");
specificationstats("R2 energy goods") = rsquared("nh","all","energy goods");
specificationstats("within R2") = withinrsquare("all sectors","to homoth") ;
specificationstats("within R2 energy goods") = withinrsquare("energy goods","to homoth");
specificationstats("aic weighted") = modelselection("aic weighted","non-homoth");
specificationstats("bic weighted") = modelselection("bic weighted","non-homoth");
specificationstats("p") = nbp("non-homoth");
specificationstats("n") = nobs;
specificationstats("sse unweighted") = sse("non-homoth");
specificationstats("sstot") = sstot;
specificationstats("avgmu - unweighted") = avgmu("unweighted") ;
specificationstats("bic unweighted") = modelselection("bic unweighted","non-homoth");
specificationstats("theta") = theta;


execute_unload 'estimates%SLASH%estimates_%ds%_%objective%_%spec%_CLM_flex_quadratic.gdx'     coeffs, U, sigma_, epsilondiff_, forstata, weightedrsquared, b_, rho_, nu_, fe_, mu_, ex, im, cst, lambda, sigma_scale, fittedexp, sectdrop, expshare, rsquared, incelast, withinrsquare, sse, modelselection, nbp, specificationstats, fstat;

